set.seed("260823")
source("Simulation_model_code/estimate_relatedness.R")
source("Simulation_model_code/simulation_model.R")
if (!dir.exists("Numerical_results")) dir.create("Numerical_results")
if (!dir.exists("Numerical_results/Figures")) dir.create("Numerical_results/Figures")
if (!dir.exists("Numerical_results/Simulated_data")) dir.create("Numerical_results/Simulated_data")
RUN_ALL <- TRUE
MAX_CLONAL_CLUSTER <- 4 # subgraph size for relationship graph
N_MARKER_DENSE <- 24000 # number of markers for dense simulated data
N_MARKER_SPARSE <- 200 # number of markers for sparse data (polymorphic, downsampled from dense)
N_LINEAGES <- 100 # number of unrelated founder lineages
N_SAMPLES <- 100 # size of simulated parasite population
N_GENERATIONS <- 10 # number of generations of inbreeding
PROB_COTRANS <- 0.4 # enriched probability of outbreeding within subgraphs
M <- 1000 # av consecutive markers inherited from a single parent
P <- 0.9 # expected frequency of major allele in founder population
N_MARKER_DOWNSAMPLE <- seq(500, 12000, 500)
N_MARKER_DOWNSAMPLE_SUBSET <- c(500, 1000, 1500, 2000, 4000, 8000, 12000)
# number of downsampled (polymorphic) markers to illustrate HMM vs independence model tradeoff
Simulate successive generations of inbreeding, capturing dense genotypic data whilst accommodating marker linkage
if (RUN_ALL || !file.exists("Numerical_results/Simulated_data/dense_data_simulation.rds")) {
dense_data_sim <- simulate_inbreeding(max_clonal_cluster=MAX_CLONAL_CLUSTER,
n_markers=N_MARKER_DENSE,
n_lineages=N_LINEAGES,
n_samples=N_SAMPLES,
n_generations=N_GENERATIONS,
prob_cotrans=PROB_COTRANS,
M=M, p=P)
write_rds(dense_data_sim, "Numerical_results/Simulated_data/dense_data_simulation.rds",
compress="gz")
} else {
dense_data_sim <- read_rds("Numerical_results/Simulated_data/dense_data_simulation.rds")
}